{   Unit with core algorithms.

    Copyright (C) 2011-2012  Matthias Karbe

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; see COPYING.txt.
    if not, see <http://www.gnu.org/licenses/> or
    write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
}

{DOC>> This unit contains core algorithms}

unit corealg;

{$mode objfpc}{$H+}

interface

uses SysUtils, commontl, ucfs;

{DOC>> permutation using the gf(2^16) polynom 0x10001111010011001,
       called MAS3, adding Length and padded 16bit chunks of buffer,
       multiplying it by 3 (MAS3 finite Field arithmetic). @br @br

       MAS3HASH is used for scrambling (permutation), its hashing
       capatibilitys are (mostly) untested,
       thought up til now it performs quite well (in collisions).
       @br @br

       All Positions and Chars contribute to the final Number.
       The value calculated, may be 4 or 8 byte (32/64bit).
       Hashes are shuffled and rotated in such a way, that bits are well
       distributed near MSB for both, long and short strings.
       This is important for the THashTrie (coreobj). Otherwise
       (a lot of 0 near MSB) will create critical bits mostly near
       LSB and probably near MSB for longer strings, which creates
       longer lookup pathes for shorter strings. @br @br

       MAS3 was autogenerated (tested and selected from all primes below
       2^16) and was not taken from any Documentation,
       Code and/or other Source.}
function mas3hash( const buf; len: MachineInt ): MachineWord; inline;

{DOC>> specialized mas3 for PUCFS32String, 0 length string hashes
       to mas3hash_len0.}
function mas3hash_sigma( pstr: PUCFS32String ): MachineWord; inline;
function mas3hash_sigma_s( const s: String ): MachineWord; inline; deprecated;
function mas3hash_len0: MachineWord; inline;

implementation

{$MACRO ON}

{$IF SizeOf(MachineWord) = 4}
  {check for Ror intrinsic (32bit)}
  {$IF DECLARED(RorDWord)}
    {$DEFINE rotate_right := RorDWord}
  {$ENDIF}
{$ELSEIF SizeOf(MachineWord) = 8}
  {check for Ror intrinsic (64bit)}
  {$IF DECLARED(RorDWord)}
    {$DEFINE rotate_right := RorQWord}
  {$ENDIF}
{$ENDIF}

{no ror intrinsic -> do it pure}
{$IF NOT DEFINED(rotate_right)}
function rotate_right( val: MachineWord; rval: Byte ): MachineWord; inline;
begin
  ASSERT(rval < SizeOf(MachineWord)*8);
  Result := (val shr MachineWord(rval)) or
            (val shl MachineWord(SizeOf(MachineWord)*8 - rval));
end;
{$ENDIF}

const
  msb16 = 1 shl 15;
  rpoly = { (reduced poly) $1E99 = x^12+x^11+x^10+x^9+x^7+x^4+x^3+1}
          1 or (1 shl 3) or (1 shl 4) or (1 shl 7) or
          (1 shl 9) or (1 shl 10) or (1 shl 11) or (1 shl 12);

function gf16_mul03( gf: MachineWord ): MachineWord; inline;
{multiply-2,add}
begin
{$PUSH}
{$R-}
  if ( gf and msb16 ) > 0 then
    Result := ( gf shl 1 ) xor rpoly xor gf
  else
    Result := ( gf shl 1 ) xor gf;
{$POP}
end;

function mas3_lastpermute( h: MachineWord ): MachineWord; inline;
{last permutation round (permute for short string shuffling)}
var i: MachineInt;
begin
  for i := 0 to 7 do
    h := rotate_right(h xor gf16_mul03(h),8);
  Result := h;
end;


function mas3_step16( h, cnt, block16: MachineWord ): MachineWord; inline;
{one hash step, takes previous hash h, position cnt and a 16bit block block16
 adding hash/position and block16*3, roling for next input}
begin
  Result := rotate_right(cnt xor gf16_mul03(h xor block16),7);
end;

function mas3hash( const buf; len: MachineInt ): MachineWord;
{possibly worst hash... simple permutation using a gf(2^16) poly and
 multiplication with 3 (x^1+1) and 2 byte from the string
 in said polynom bound ring}
var
  bufp: PByte;
  i: MachineInt;

begin
  bufp := PByte(@Buf);
  Result := 0;
  i := 1;
  if len > 1 then
    begin
      repeat
        Result := mas3_step16(Result,i,bufp^);
        Inc(i,1);
        Inc(bufp,1);
      until i >= len;
    end;
  {permute again, makes it look more hashy}
  Result := mas3_lastpermute(Result);
end;

function mas3hash_sigma( pstr: PUCFS32String ): MachineWord;
{PUCFS32String variant for hashing,
  -> does same lastpermute round, so length0 hash is the same}
var
  i: MachineInt;
  uc: TUCFS32Char;
begin
  Result := 0;
  if ucfs_length(pstr) > 0 then
    begin
      i := 1;
      repeat
        uc := ucfs_getc(pstr,i);
        Result := mas3_step16(Result,i,uc);
        Inc(i,1);
      until i > ucfs_length(pstr);
    end;
  {permute again -> same as byte hash, so 0 will yield same hash}
  Result := mas3_lastpermute(Result);
end;

var
  mas3hash_len0_val: MachineWord;

function mas3hash_sigma_s(const s: String): MachineWord;
var tmps: PUCFS32String;
begin
  tmps := ucfs_utf8us(s);
  Result := mas3hash_sigma(tmps);
  ucfs_release(tmps);
end;

function mas3hash_len0: MachineWord;
begin
  Result := mas3hash_len0_val;
end;

initialization
  mas3hash_len0_val := 0;
  mas3hash_len0_val := mas3hash_sigma(nil);

end.

